1 SymPortal pre-submission

Submitting ITS2 samples to Symportal

Prior to submission

  • removing reads containing Illumina sequencing adapters
  • retaining only paired reads that match ITS2 primer
# *note: most of this was written by Dr. Carly D. Kenkel

# in Terminal home directory:
# following instructions of installing BBtools from https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/installation-guide/
# 1. download BBMap package, sftp to installation directory
# 2. untar: 
tar -xvzf BBMap_(version).tar.gz
# 3. test package:
cd bbmap
~/bin/bbmap/stats.sh in=~/bin/bbmap/resources/phix174_ill.ref.fa.gz

# my adaptors, which I saved as "adaptors.fasta"
# >forward
# AATGATACGGCGACCAC
# >forwardrc
# GTGGTCGCCGTATCATT
# >reverse
# CAAGCAGAAGACGGCATAC
# >reverserc
# GTATGCCGTCTTCTGCTTG

# primers for ITS2:
# >forward
# GTGAATTGCAGAACTCCGTG
# >reverse
# CCTCCGCTTACTTATATGCTT

# making a sample list based on the first phrase before the underscore in the .fastq name
ls *R1_001.fastq | cut -d '_' -f 1 > samples.list

# cuts off the extra words in the .fastq files
for file in $(cat samples.list); do  mv ${file}_*R1*.fastq ${file}_R1.fastq; mv ${file}_*R2*.fastq ${file}_R2.fastq; done 

# gets rid of reads that still have the adaptor sequence
for file in $(cat samples.list); do ~/bin/bbmap/bbduk.sh in1=${file}_R1.fastq in2=${file}_R2.fastq ref=adaptors.fasta out1=${file}_R1_NoIll.fastq out2=${file}_R2_NoIll.fastq; done &>bbduk_NoIll.log

# only keeping reads that start with the primer
for file in $(cat samples.list); do ~/bin/bbmap/bbduk.sh in1=${file}_R1_NoIll.fastq in2=${file}_R2_NoIll.fastq k=15 restrictleft=21 literal=GTGAATTGCAGAACTCCGTG,CCTCCGCTTACTTATATGCTT outm1=${file}_R1_NoIll_ITS.fastq outu1=${file}_R1_check.fastq outm2=${file}_R2_NoIll_ITS.fastq outu2=${file}_R2_check.fastq; done &>bbduk_ITS.log
# higher k = more reads removed, but can't surpass k=20 or 21

# renamed them to the shorter version again
for file in $(cat samples.list); do  mv ${file}_*R1*.fastq ${file}_R1.fastq; mv ${file}_*R2*.fastq ${file}_R2.fastq; done 

Then transferred all “R1.fastq” & “R2.fastq” files to the folder to be submitted to SymPortal

2 SymPortal ITS2 analysis

Information on Symportal output documents here

Formatting notes:

  • Cleaned up file called 165_20210816_DBV_20210816T102518.profiles.absolute.abund_and_meta.txt to format for phyloseq

2.1 Setup

## Loading required package: lattice
## Loading required package: plyr
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
## The following object is masked from 'package:plyr':
## 
##     mutate
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## This is vegan 2.5-7

2.2 Checking for low read samples

counts <- read.csv('symportal_profile_counts.csv',header=TRUE,row.names=1,check.names=FALSE)

plot(rowSums(counts)) 

#3 that are 0, removing them now: (513, 87, 76)
counts.no0 <- counts[1:93,]

2.3 Phyloseq object - type profiles

Ran once then saved to re-read in later

# import dataframe holding sample information
samdf<-read.csv("mrits_sampledata copy.csv")
head(samdf)
##   Sample            Site X.DNA...ng.ul. island direction     zone site
## 1    101 Moorea NW Inner            304 Moorea        NW Backreef  MNW
## 2    113 Moorea NW Outer            379 Moorea        NW Forereef  MNW
## 3    115 Moorea NW Outer            407 Moorea        NW Forereef  MNW
## 4    116 Moorea NW Outer            670 Moorea        NW Forereef  MNW
## 5    117 Moorea NW Outer            755 Moorea        NW Forereef  MNW
## 6    130 Moorea NW Outer            200 Moorea        NW Forereef  MNW
##   site_zone DNAcon zoox
## 1     MNW-B  304.2    y
## 2     MNW-F  379.0    y
## 3     MNW-F  407.0    y
## 4     MNW-F  670.0    y
## 5     MNW-F  755.0    y
## 6     MNW-F  200.0    y
rownames(samdf) <- samdf$Sample
samdf$sample_full <- paste(samdf$site_zone,samdf$Sample)

# import taxa info
taxa <- read.csv("symportal_taxa.csv",header=TRUE)
rownames(taxa) <- as.factor(taxa$ITS2_type_profile)
mtaxa <- as.matrix(taxa)

# import counts (absolute abundance from its2 type profiles)
mcounts <- as.matrix(counts.no0)

# Construct phyloseq object 
ps <- phyloseq(otu_table(mcounts, taxa_are_rows=FALSE),
               sample_data(samdf),
               tax_table(mtaxa))

ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9 taxa and 93 samples ]
## sample_data() Sample Data:       [ 93 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 9 taxa by 7 taxonomic ranks ]
#saveRDS(ps,file="ps.its2.RDS")

2.4 Phyloseq object - all types

Ran once then saved to re-read in later

counts.all <- read.csv("symportal_allcounts.csv",header=T,row.names=1)

plot(rowSums(counts.all)) 

#3 that are 0, removing them now: (513, 87, 76)
counts.all.no0 <- counts.all[1:93,]

# # import taxa info - haven't done this with the full suite of types yet
# taxa <- read.csv("symportal_taxa.csv",header=TRUE)
# rownames(taxa) <- as.factor(taxa$ITS2_type_profile)
# mtaxa <- as.matrix(taxa)

# import counts (absolute abundance from its2 type profiles)
mcounts.all <- as.matrix(counts.all.no0)

# Construct phyloseq object 
ps.all <- phyloseq(otu_table(mcounts.all, taxa_are_rows=FALSE),
               sample_data(samdf))

ps.all
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 157 taxa and 93 samples ]
## sample_data() Sample Data:       [ 93 samples by 11 sample variables ]
#saveRDS(ps.all,"ps.all.its2.RDS")

2.5 Re-read in data - all types

#ps <- readRDS("ps.its2.RDS")
#ps.all <- readRDS("ps.all.its2.RDS")

2.6 Bar plots

2.6.1 Relative abundance by sample

# first look at data
plot_bar(ps, "Sample", fill="ITS2_type_profile")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

2.6.2 Absolute abundance by site & zone

plot_bar(ps,"ITS2_type_profile", fill="ITS2_type_profile",facet_grid=~site_zone)
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

2.7 Relative abundance bar plots

2.7.1 Type profile by sample

#all samples
#plot_bar(ps,"sample_full")
ps.rel <- transform_sample_counts(ps, function(x) x / sum(x))
plot_bar(ps.rel,"ITS2_type_profile", fill="ITS2_type_profile",facet_grid=~site_zone)
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

gg.bar <- plot_bar(ps.rel,"sample_full",fill="ITS2_type_profile")+
  geom_bar(stat="identity")+#put color back here in aes
  #scale_color_manual(values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
  scale_fill_manual(name="ITS2 type profiles", values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
  theme_cowplot()+
  #theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_x_discrete(breaks=c("MNW-B 70","MNW-F 172","MSE-B 307","MSE-F 614","TNW-B 526","TNW-F 405"),labels=c("MNW-B","MNW-F","MSE-B","MSE-F","TNW-B","TNW-F"))+
  xlab("Site-reef zone")+
  geom_vline(xintercept=14.5)+
  geom_vline(xintercept=30.5)+
  geom_vline(xintercept=46.5)+
  geom_vline(xintercept=62.5)+
  geom_vline(xintercept=77.5)+
  geom_vline(xintercept=93.5)
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
gg.bar

#counted 14 samples for MNW-B
#16 for MNW-F
#16 MSE-B
#16 MSE-F
#15 TNW-B
#16 TNW-F

#ggsave(gg.bar,file="sym.barplot.pdf",h=4,w=11)                        

#want to re-order by similar samples

2.7.2 Type profile by site & zone

ps.sz <- merge_samples(ps, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
plot_bar(ps.rel.sz, fill="ITS2_type_profile")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

ps.mnw <- subset_samples(ps, site=="MNW")
ps.mnw.z <- merge_samples(ps.mnw, "zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.mnw.z.rel <- transform_sample_counts(ps.mnw.z, function(x) x / sum(x))
ps.mse <- subset_samples(ps, site=="MSE")
ps.mse.z <- merge_samples(ps.mse, "zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.mse.z.rel <- transform_sample_counts(ps.mse.z, function(x) x / sum(x))
ps.tnw <- subset_samples(ps, site=="TNW")
ps.tnw.z <- merge_samples(ps.tnw, "zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.tnw.z.rel <- transform_sample_counts(ps.tnw.z, function(x) x / sum(x))

ps.mnw.z <- merge_samples(ps.mnw, "zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.mnw.z.rel <- transform_sample_counts(ps.mnw.z, function(x) x / sum(x))
bar.mnw <- plot_bar(ps.mnw.z.rel, fill="ITS2_type_profile")+
  theme_cowplot()+
  scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
  ylab("Relative abundance")+
  xlab("Reef zone")+
  scale_x_discrete(labels=c("BR","FR"))+
  ggtitle("Mo'orea NW")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.mnw

bar.mse <- plot_bar(ps.mse.z.rel, fill="ITS2_type_profile")+
  theme_cowplot()+
  scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
  ylab("Relative abundance")+
  scale_x_discrete(labels=c("BR","FR"))+
  xlab("Reef zone")+
  ggtitle("Mo'orea SE")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.mse

bar.tnw <- plot_bar(ps.tnw.z.rel, fill="ITS2_type_profile")+
  theme_cowplot()+
  scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
  xlab("Reef zone")+
  ylab("Relative abundance")+
  scale_x_discrete(labels=c("BR","FR"))+
  ggtitle("Tahiti NW")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.tnw

2.7.3 Genus by sample

Not super interesting

ps.clade <- tax_glom(ps, "Clade")
ps.clade.rel <- transform_sample_counts(ps.clade, function(x) x / sum(x))
plot_bar(ps.clade.rel, "sample_full", fill="Clade")+
    geom_bar(aes(color=Clade, fill=Clade), stat="identity", position="stack")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

2.8 PCAs

2.8.1 Type profiles

ps.ord <- ordinate(ps,"PCoA",distance="bray")
plot_ordination(ps, ps.ord, color ="site", shape="zone")+
  geom_point(alpha=0.5)+
  scale_color_manual(values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(values=c(16,15))+
  stat_ellipse(aes(linetype=zone))+
  theme_cowplot()
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

#ggsave(filename="pcoa.types.site.pdf")
#just cladocopium
# ps.c <- subset_taxa(ps,Clade=="C")
# ps.c.no0 <- subset_samples(ps.c,sample_sums(ps.c)!=0)
# 
# ps.ord.c <- ordinate(ps.c.no0,"PCoA",distance="bray")
# plot_ordination(ps.c.no0, ps.ord.c, color ="site")+
#   geom_point(alpha=0.5)
# #doesn't really change between C or C+A

ps.mnw <- subset_samples(ps,site=="MNW")
gg.pc.mnw.types <- plot_ordination(ps.mnw, ordinate(ps.mnw,"PCoA",distance="bray"), color ="zone",shape="zone")+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  xlab("Axis 1 (61.4%)")+
  ylab("Axis 2 (12.2%)")+
  #theme(legend.position="none")+
  ggtitle("Mo'orea NW")
gg.pc.mnw.types
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ps.mse <- subset_samples(ps,site=="MSE")
gg.pc.mse.types <- plot_ordination(ps.mse, ordinate(ps.mse,"PCoA",distance="bray"), color ="zone",shape="zone")+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  ggtitle("Mo'orea SE")+
  xlab("Axis 1 (52.4%)")+
  ylab("Axis 2 (23.5%)")
gg.pc.mse.types
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ps.tnw <- subset_samples(ps,site=="TNW")
gg.pc.tnw.types <- plot_ordination(ps.tnw, ordinate(ps.tnw,"PCoA",distance="bray"), color ="zone",shape="zone")+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  ggtitle("Tahiti NW")+
  xlab("Axis 1 (41.1%)")+
  ylab("Axis 2 (21.5%)")
gg.pc.tnw.types

2.8.2 All types

Reading in coordinate files given by SymPortal: (done on full suite of sequences, not the type profiles)

pcoa.c <- read.csv("symportal_bray_pcoa_cladec.csv")
#pcoa.c <- read.csv("symportal_bray_pcoa_cladec_sqrt.csv")
pcoa.c$Sample <- pcoa.c$sample
pcoa.samdata.c <- merge(pcoa.c,samdf,by="Sample")

2.8.3 Plot - site & zone

#quick raw plot
ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=site))+
  geom_point()

ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=site_zone,shape=site,linetype=zone))+
  geom_point()+
  stat_ellipse()+
  #scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  #scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

2.8.4 Plot - zone summed

ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=zone,shape=zone))+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

2.9 PCAs by site

2.9.1 MNW

pcoa.mnw <- subset(pcoa.samdata.c,site=="MNW")

gg.pc.mnw <- ggplot(pcoa.mnw,aes(x=PC1,y=PC2,color=zone,shape=zone))+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  theme(legend.position="none")+
  ggtitle("Mo'orea NW")+
  xlab(" ")
gg.pc.mnw

2.9.2 MSE

pcoa.mse <- subset(pcoa.samdata.c,site=="MSE")

gg.pc.mse <- ggplot(pcoa.mse,aes(x=PC1,y=PC2,color=zone,shape=zone))+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  ggtitle("Mo'orea SE")+
  theme(legend.position="none")+
  ylab(" ")
gg.pc.mse

2.9.3 TNW

pcoa.tnw <- subset(pcoa.samdata.c,site=="TNW")

gg.pc.tnw <- ggplot(pcoa.tnw,aes(x=PC1,y=PC2,color=zone,shape=zone))+
  geom_point()+
  stat_ellipse()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  theme_cowplot()+
  ggtitle("Tahiti NW")+
  ylab(" ")+
  xlab(" ")
gg.pc.tnw

2.10 Synthesizing results

library(ggpubr)
gg.pca <- ggarrange(gg.pc.mnw.types,gg.pc.mse.types,gg.pc.tnw.types,labels=c("(a)","(b)","(c)"),nrow=1,common.legend=T,legend="right")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
gg.pca

gg.bar <- ggarrange(bar.mnw,bar.mse,bar.tnw,labels=c("(d)","(e)","(f)"),nrow=1,common.legend=TRUE,legend="right")
gg.bar

ggarrange(gg.pca,gg.bar,nrow=2)

#ggsave(file="pca.bar.pdf",height=5.5,width=9.5)

2.11 Normalized results

Thank you to Ryan Eckhart for parts of this: Github here

#BiocManager::install("edgeR", update = FALSE)
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.1.1
## Loading required package: limma
seqs.types <- as.data.frame(ps@otu_table)
seqs.types.t <- t(seqs.types)

its2SeqList = DGEList(counts = seqs.types.t)
head(its2SeqList$samples)
##     group lib.size norm.factors
## 308     1     2306            1
## 300     1     4618            1
## 97      1     2589            1
## 172     1     5651            1
## 627     1     7381            1
## 96      1     2750            1
its2SeqNorm =  calcNormFactors(its2SeqList, method = "TMM")
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
head(its2SeqNorm$samples)
##     group lib.size norm.factors
## 308     1     2306     1.009554
## 300     1     4618     1.009554
## 97      1     2589     1.009554
## 172     1     5651     1.009554
## 627     1     7381     1.009554
## 96      1     2750     1.009554
its2TMM = t(cpm(its2SeqNorm, normalized.lib.sizes = TRUE))

#phyloseq
row.names(samdf) <- samdf$Sample
ps.norm <- phyloseq(otu_table(its2TMM, taxa_are_rows=FALSE),
               sample_data(samdf),
               tax_table(mtaxa))

ps.norm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9 taxa and 93 samples ]
## sample_data() Sample Data:       [ 93 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 9 taxa by 7 taxonomic ranks ]
ps.norm.ord <- ordinate(ps.norm,"PCoA",distance="bray")
plot_ordination(ps.norm, ps.norm.ord, color ="site")+
  geom_point(alpha=0.5)

ps.norm.mnw <- subset_samples(ps.norm,site=="MNW")

plot_ordination(ps.norm.mnw, ordinate(ps.norm.mnw,"PCoA","bray"), color ="zone")+
  geom_point(alpha=0.5)+
  stat_ellipse()
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ps.norm.mse <- subset_samples(ps.norm,site=="MSE")

plot_ordination(ps.norm.mse, ordinate(ps.norm.mse,"PCoA","bray"), color ="zone")+
  geom_point(alpha=0.5)+
  stat_ellipse()
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ps.norm.tnw <- subset_samples(ps.norm,site=="TNW")

plot_ordination(ps.norm.tnw, ordinate(ps.norm.tnw,"PCoA","bray"), color ="zone")+
  geom_point(alpha=0.5)+
  stat_ellipse()

#BiocManager::install("edgeR", update = FALSE)
library(edgeR)

seqs.all <- as.data.frame(ps.all@otu_table)
seqs.all.t <- t(seqs.all)

its2SeqList.all = DGEList(counts = seqs.all.t)
head(its2SeqList.all$samples)

its2SeqNorm.all =  calcNormFactors(its2SeqList.all, method = "TMM")
head(its2SeqNorm.all$samples)
its2TMM.all = t(cpm(its2SeqNorm.all, normalized.lib.sizes = TRUE))

#phyloseq
ps.norm.all <- phyloseq(otu_table(its2TMM.all, taxa_are_rows=FALSE),
               sample_data(samdf))

ps.norm.all

ps.norm.all.ord <- ordinate(ps.norm.all,"PCoA",distance="bray")
plot_ordination(ps.norm.all, ps.norm.all.ord, color ="site")+
  geom_point(alpha=0.5)+
  stat_ellipse()

3 Stats

3.1 New phyloseq object

Fuller data counts from Symportal before type profiles identified

First we purge sequences and transpose the data to work with edgeR

goods = purgeOutliers(its2Seq, count.columns = 4:length(its2Seq), otu.cut = 0.0001, sampleZcut = -5)
its2SeqTransposed = t(goods[, 4:length(goods[1, ])])
its2SeqList = DGEList(counts = its2SeqTransposed)
head(its2SeqList$samples)

Normalize

3.2 Stats - type profiles

library(vegan)
#remotes::install_github("Jtrachsel/funfuns")
library(funfuns)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(edgeR)

Raw

seq.ps <- data.frame(ps@otu_table)
samdf.ps <- data.frame(ps@sam_data)

dist.ps <- vegdist(seq.ps)
bet.ps <- betadisper(dist.ps,samdf.ps$site)
#anova(bet.ps) #ns
permutest(bet.ps,pairwise=TRUE,permutations=999)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     2 0.1579 0.078968 1.1853    999  0.306
## Residuals 90 5.9961 0.066623                     
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##         MNW     MSE   TNW
## MNW         0.65000 0.245
## MSE 0.63305         0.138
## TNW 0.26075 0.13554
plot(bet.ps)

adonis(seq.ps ~ site/zone, data=samdf.ps, permutations=999) #p<001***
## 
## Call:
## adonis(formula = seq.ps ~ site/zone, data = samdf.ps, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2     6.159 3.07961  9.9353 0.16920  0.001 ***
## site:zone  3     3.277 1.09226  3.5238 0.09001  0.001 ***
## Residuals 87    26.967 0.30997         0.74079           
## Total     92    36.403                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps, factors=samdf.ps$site, permutations=999) #tahiti different from other two
##        pairs    F.Model         R2 p.value p.adjusted
## 1 MSE vs MNW  0.8174161 0.01344049   0.445      0.445
## 2 MSE vs TNW 13.6939515 0.18333414   0.001      0.001
## 3 MNW vs TNW 12.1951261 0.17129159   0.001      0.001
#        pairs    F.Model         R2 p.value p.adjusted
# 1 MSE vs MNW  0.8174161 0.01344049   0.445      0.445
# 2 MSE vs TNW 13.6939515 0.18333414   0.001      0.001
# 3 MNW vs TNW 12.1951261 0.17129159   0.001      0.001

Normalized

seq.ps.norm <- data.frame(ps.norm@otu_table)
samdf.ps.norm <- data.frame(ps.norm@sam_data)

dist.ps.norm <- vegdist(seq.ps.norm)
bet.ps.norm <- betadisper(dist.ps.norm,samdf.ps.norm$site)
## Warning in betadisper(dist.ps.norm, samdf.ps.norm$site): some squared distances
## are negative and changed to zero
anova(bet.ps.norm) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Groups     2  0.6538 0.32689  1.8307 0.1662
## Residuals 90 16.0702 0.17856
plot(bet.ps.norm)

adonis(seq.ps.norm ~ site/zone, data=samdf.ps.norm, permutations=999) #p<001***
## 
## Call:
## adonis(formula = seq.ps.norm ~ site/zone, data = samdf.ps.norm,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2     6.880  3.4401 12.0076 0.19694  0.001 ***
## site:zone  3     3.131  1.0437  3.6429 0.08962  0.001 ***
## Residuals 87    24.925  0.2865         0.71344           
## Total     92    34.936                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps.norm, factors=samdf.ps.norm$site, permutations=999) #tahiti different from other two
##        pairs    F.Model         R2 p.value p.adjusted
## 1 MSE vs MNW  0.8634241 0.01418626   0.401      0.401
## 2 MSE vs TNW 16.5137674 0.21304302   0.001      0.001
## 3 MNW vs TNW 14.5285152 0.19759022   0.001      0.001
#        pairs    F.Model         R2 p.value p.adjusted
# 1 MSE vs MNW  0.8174161 0.01344049   0.445      0.445
# 2 MSE vs TNW 13.6939515 0.18333414   0.001      0.001
# 3 MNW vs TNW 12.1951261 0.17129159   0.001      0.001

Relative abundance

seq.ps.rel <- data.frame(ps.rel@otu_table)
samdf.ps.rel <- data.frame(ps.rel@sam_data)

dist.ps.rel <- vegdist(seq.ps.rel)
bet.ps.rel <- betadisper(dist.ps.rel,samdf.ps.rel$site)
## Warning in betadisper(dist.ps.rel, samdf.ps.rel$site): some squared distances
## are negative and changed to zero
anova(bet.ps.rel) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Groups     2  0.6456 0.32280  1.8182 0.1682
## Residuals 90 15.9780 0.17753
plot(bet.ps.rel)

adonis(seq.ps.rel ~ site/zone, data=samdf.ps.rel, permutations=999) #p<001***
## 
## Call:
## adonis(formula = seq.ps.rel ~ site/zone, data = samdf.ps.rel,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2     6.856  3.4280 11.9219 0.19590  0.001 ***
## site:zone  3     3.126  1.0421  3.6243 0.08933  0.001 ***
## Residuals 87    25.016  0.2875         0.71477           
## Total     92    34.998                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps.rel, factors=samdf.ps.rel$site, permutations=999) #tahiti different from other two
##        pairs    F.Model         R2 p.value p.adjusted
## 1 MSE vs MNW  0.8838874 0.01451759   0.412      0.412
## 2 MSE vs TNW 16.5137674 0.21304302   0.001      0.001
## 3 MNW vs TNW 14.3300043 0.19541802   0.001      0.001
#        pairs    F.Model         R2 p.value p.adjusted
# 1 MSE vs MNW  0.8174161 0.01344049   0.445      0.445
# 2 MSE vs TNW 13.6939515 0.18333414   0.001      0.001
# 3 MNW vs TNW 12.1951261 0.17129159   0.001      0.001
ps.mnw <- subset_samples(ps,site=="MNW")
ps.mnw.rel <- subset_samples(ps.rel,site=="MNW")
ps.mnw.norm <- subset_samples(ps.norm,site=="MNW")

ps.mse <- subset_samples(ps,site=="MSE")
ps.mse.rel <- subset_samples(ps.rel,site=="MSE")
ps.mse.norm <- subset_samples(ps.norm,site=="MSE")

ps.tnw <- subset_samples(ps,site=="TNW")
ps.tnw.rel <- subset_samples(ps.rel,site=="TNW")
ps.tnw.norm <- subset_samples(ps.norm,site=="TNW")

Mo’orea NW

seq.ps.mnw <- data.frame(otu_table(ps.mnw))
samdf.ps.mnw <- data.frame(sample_data(ps.mnw))
row.names(seq.ps.mnw)==row.names(samdf.ps.mnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.ps.mnw <- vegdist(seq.ps.mnw)
bet.ps.mnw <- betadisper(dist.ps.mnw,samdf.ps.mnw$zone)
anova(bet.ps.mnw) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     1 0.2608 0.26076  1.9915 0.1692
## Residuals 28 3.6661 0.13093
plot(bet.ps.mnw)

adonis(seq.ps.mnw ~ zone, data=samdf.ps.mnw, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.mnw ~ zone, data = samdf.ps.mnw, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1    1.9833 1.98334  7.6393 0.21435  0.002 **
## Residuals 28    7.2695 0.25962         0.78565          
## Total     29    9.2528                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Normalized

seq.ps.mnw.norm <- data.frame(otu_table(ps.mnw.norm))
samdf.ps.mnw.norm <- data.frame(sample_data(ps.mnw.norm))
row.names(seq.ps.mnw.norm)==row.names(samdf.ps.mnw.norm)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.ps.mnw.norm <- vegdist(seq.ps.mnw.norm)
bet.ps.mnw.norm <- betadisper(dist.ps.mnw.norm,samdf.ps.mnw.norm$zone)
anova(bet.ps.mnw.norm) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     1 0.5791 0.57914  2.8779 0.1009
## Residuals 28 5.6346 0.20124
plot(bet.ps.mnw.norm)

adonis(seq.ps.mnw.norm ~ zone, data=samdf.ps.mnw.norm, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.mnw.norm ~ zone, data = samdf.ps.mnw.norm,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1    2.0483  2.0483  8.9327 0.24186  0.002 **
## Residuals 28    6.4204  0.2293         0.75814          
## Total     29    8.4687                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rel abundance

seq.ps.mnw.rel <- data.frame(otu_table(ps.mnw.rel))

dist.ps.mnw <- vegdist(seq.ps.mnw.rel)
bet.ps.mnw <- betadisper(dist.ps.mnw,samdf.ps.mnw$zone)
anova(bet.ps.mnw) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     1 0.5375 0.53753  2.6532 0.1145
## Residuals 28 5.6727 0.20259
permutest(bet.ps.mnw, permutations = 999,pairwise=F)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.5375 0.53753 2.6532    999  0.113
## Residuals 28 5.6727 0.20259
plot(bet.ps.mnw) #ns

adonis(seq.ps.mnw.rel ~ zone, data=samdf.ps.mnw, permutations=999) #sig p < 0.01
## 
## Call:
## adonis(formula = seq.ps.mnw.rel ~ zone, data = samdf.ps.mnw,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    2.0437 2.04367  8.7878 0.23888  0.001 ***
## Residuals 28    6.5116 0.23256         0.76112           
## Total     29    8.5553                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Mo’orea SE

seq.ps.mse <- data.frame(otu_table(ps.mse))
samdf.ps.mse <- data.frame(sample_data(ps.mse))
row.names(seq.ps.mse)==row.names(samdf.ps.mse)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.ps.mse <- vegdist(seq.ps.mse)
bet.ps.mse <- betadisper(dist.ps.mse,samdf.ps.mse$zone)
anova(bet.ps.mse) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## Groups     1 0.42925 0.42925  6.6824 0.01484 *
## Residuals 30 1.92711 0.06424                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse)

adonis(seq.ps.mse ~ zone, data=samdf.ps.mse, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.mse ~ zone, data = samdf.ps.mse, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## zone       1    0.9283 0.92830  3.2027 0.09646  0.029 *
## Residuals 30    8.6954 0.28985         0.90354         
## Total     31    9.6237                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Normalized

seq.ps.mse.norm <- data.frame(otu_table(ps.mse.norm))
samdf.ps.mse.norm <- data.frame(sample_data(ps.mse.norm))
row.names(seq.ps.mse.norm)==row.names(samdf.ps.mse.norm)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.ps.mse.norm <- vegdist(seq.ps.mse.norm)
bet.ps.mse.norm <- betadisper(dist.ps.mse.norm,samdf.ps.mse.norm$zone)
anova(bet.ps.mse.norm) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value  Pr(>F)   
## Groups     1 1.1009 1.10094  8.0701 0.00801 **
## Residuals 30 4.0927 0.13642                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse.norm)

adonis(seq.ps.mse.norm ~ zone, data=samdf.ps.mse.norm, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.mse.norm ~ zone, data = samdf.ps.mse.norm,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## zone       1    0.7813 0.78125  2.9528 0.08961  0.065 .
## Residuals 30    7.9375 0.26458         0.91039         
## Total     31    8.7188                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rel abundance

seq.ps.mse.rel <- data.frame(otu_table(ps.mse.rel))

dist.ps.mse <- vegdist(seq.ps.mse.rel)
bet.ps.mse <- betadisper(dist.ps.mse,samdf.ps.mse$zone)
anova(bet.ps.mse) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value  Pr(>F)   
## Groups     1 1.1009 1.10094  8.0701 0.00801 **
## Residuals 30 4.0927 0.13642                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permutest(bet.ps.mse, permutations = 999,pairwise=F)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq Mean Sq      F N.Perm Pr(>F)   
## Groups     1 1.1009 1.10094 8.0701    999  0.008 **
## Residuals 30 4.0927 0.13642                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse) #ns

adonis(seq.ps.mse.rel ~ zone, data=samdf.ps.mse, permutations=999) #sig p < 0.01
## 
## Call:
## adonis(formula = seq.ps.mse.rel ~ zone, data = samdf.ps.mse,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## zone       1    0.7813 0.78125  2.9528 0.08961  0.085 .
## Residuals 30    7.9375 0.26458         0.91039         
## Total     31    8.7188                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tahiti NW

seq.ps.tnw <- data.frame(otu_table(ps.tnw))
samdf.ps.tnw <- data.frame(sample_data(ps.tnw))
row.names(seq.ps.tnw)==row.names(samdf.ps.tnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE
dist.ps.tnw <- vegdist(seq.ps.tnw)
bet.ps.tnw <- betadisper(dist.ps.tnw,samdf.ps.tnw$zone)
anova(bet.ps.tnw) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.04729 0.047288  0.7298 0.3999
## Residuals 29 1.87902 0.064794
plot(bet.ps.tnw)

adonis(seq.ps.tnw ~ zone, data=samdf.ps.tnw, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.tnw ~ zone, data = samdf.ps.tnw, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.3652 0.36515 0.96248 0.03212  0.386
## Residuals 29   11.0021 0.37938         0.96788       
## Total     30   11.3673                 1.00000

Normalized

seq.ps.tnw.norm <- data.frame(otu_table(ps.tnw.norm))
samdf.ps.tnw.norm <- data.frame(sample_data(ps.tnw.norm))
row.names(seq.ps.tnw.norm)==row.names(samdf.ps.tnw.norm)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE
dist.ps.tnw.norm <- vegdist(seq.ps.tnw.norm)
bet.ps.tnw.norm <- betadisper(dist.ps.tnw.norm,samdf.ps.tnw.norm$zone)
## Warning in betadisper(dist.ps.tnw.norm, samdf.ps.tnw.norm$zone): some squared
## distances are negative and changed to zero
anova(bet.ps.tnw.norm) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.0801 0.080086  0.5664 0.4577
## Residuals 29 4.1001 0.141384
plot(bet.ps.tnw.norm)

adonis(seq.ps.tnw.norm ~ zone, data=samdf.ps.tnw.norm, permutations=999) #p<01**
## 
## Call:
## adonis(formula = seq.ps.tnw.norm ~ zone, data = samdf.ps.tnw.norm,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.3014 0.30144  0.8273 0.02774  0.471
## Residuals 29   10.5666 0.36437         0.97226       
## Total     30   10.8681                 1.00000

Rel abundance

seq.ps.tnw.rel <- data.frame(otu_table(ps.tnw.rel))

dist.ps.tnw <- vegdist(seq.ps.tnw.rel)
bet.ps.tnw <- betadisper(dist.ps.tnw,samdf.ps.tnw$zone)
## Warning in betadisper(dist.ps.tnw, samdf.ps.tnw$zone): some squared distances
## are negative and changed to zero
anova(bet.ps.tnw) #ns
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.0801 0.080086  0.5664 0.4577
## Residuals 29 4.1001 0.141384
permutest(bet.ps.tnw, permutations = 999,pairwise=F)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.0801 0.080086 0.5664    999  0.436
## Residuals 29 4.1001 0.141384
plot(bet.ps.tnw) #ns

adonis(seq.ps.tnw.rel ~ zone, data=samdf.ps.tnw, permutations=999) #sig p < 0.01
## 
## Call:
## adonis(formula = seq.ps.tnw.rel ~ zone, data = samdf.ps.tnw,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.3014 0.30144  0.8273 0.02774  0.474
## Residuals 29   10.5666 0.36437         0.97226       
## Total     30   10.8681                 1.00000

All syms

seq.all <- data.frame(otu_table(ps.all))
samdf.ps.all <- data.frame(sample_data(ps.all))
row.names(seq.all)==row.names(samdf.ps.all)

dist.all <- vegdist(seq.all)
bet.mnw.c <- betadisper(dist.all,samdf.ps.all$zone)
anova(bet.mnw.c) #ns
plot(bet.mnw.c) #very much overlap, not sig
adonis(otu.mnw.c ~ site*zone, data=samdf.mnw.c, permutations=999)
seq.a <- select(seq.all,contains('A')) 
#get rid of column 29 - it's a C
seq.a.clean <- seq.a[,1:28]

seq.c <- select(seq.all,contains('C')) 
#get rid of column 1, it's an A
seq.c.clean <- seq.c[,2:129]

seq.ac <- seq.all %>%
  select(contains('A')) %>%
  select(contains('C'))
#just those two

seq.ac <- select(seq.all,!contains(c('C','A'))) 
#there's a B1 in there haha

seq.c.clean2 <- seq.c.clean[!rowSums(seq.c.clean)==0,]

ps.all.c <- phyloseq(otu_table(as.matrix(seq.c.clean2), taxa_are_rows=FALSE),
               sample_data(samdf))

ps.all.c

seq.a.clean2 <- seq.a.clean[!rowSums(seq.a.clean)==0,]

ps.all.a <- phyloseq(otu_table(as.matrix(seq.a.clean2), taxa_are_rows=FALSE),
               sample_data(samdf))

ps.all.a

#stats time - CLADOCOPIUM
dist.c <- vegdist(seq.c.clean2)
samdf.ps.c <- data.frame(sample_data(ps.all.c))
row.names(samdf.ps.c)==row.names(seq.c.clean2)

bet.c <- betadisper(dist.c,samdf.ps.c$site)
anova(bet.c) #ns
plot(bet.c) #very much overlap, not sig

adonis(seq.c.clean2 ~ site/zone, data=samdf.ps.c, permutations=999)

pairwise.adonis(seq.c.clean2, factors = samdf.ps.c$site, permutations = 999)
#Tahiti different from other two

ps.mnw.c <- subset_samples(ps.all.c,site=="MNW")
otu.mnw.c <- data.frame(otu_table(ps.mnw.c))
samdf.mnw.c <- data.frame(sample_data(ps.mnw.c))
row.names(otu.mnw.c)==row.names(samdf.mnw.c)

dist.mnw.c <- vegdist(otu.mnw.c)
bet.mnw.c <- betadisper(dist.mnw.c,samdf.mnw.c$zone)
anova(bet.mnw.c) #ns
plot(bet.mnw.c) #very much overlap, not sig
adonis(otu.mnw.c ~ zone, data=samdf.mnw.c, permutations=999)

3.3 Find “most frequent” sym

Just ran once to get the info

find.top.asv <- function(x,num){
  require(phyloseq)
  require(magrittr)
  
  otu <- otu_table(x)
  tax <- tax_table(x)
  j1 <- apply(otu,1,sort,index.return=T, decreasing=T) # modifying which.max to return a list of sorted index
  j2 <- lapply(j1,'[[',"ix") # select for index
  
  l <- data.frame(unique(tax@.Data[unlist(j2),]))
  m <- data.frame(otu@.Data[,unique(unlist(j2))])
  n <- apply(m,1,sort,index.return=T, decreasing=T) %>%
    lapply('[[',"ix") %>%  # Extract index
    lapply(head,n=num) # This to returns the top x tax

  p <- list()
  for(i in 1:length(n)){
    p[[i]]<- colnames(m)[n[[i]]]
  }
  m$taxa <- p
  return(m)
}

top.asvs <- find.top.asv(ps,1)
top.asvs$taxa <- as.character(top.asvs$taxa)

#write.csv(top.asvs,file="top_sym.csv",row.names=TRUE)
table <- read.csv("silly_its_table.csv",header=T)

heatmap(as.matrix(table[,2:11]), labRow=table[,1],rowv=NA)
## Warning in plot.window(...): "rowv" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "rowv" is not a graphical parameter
## Warning in title(...): "rowv" is not a graphical parameter